\relax 
\citation{BurgerGK96,Agarwal00,Dally-DAC}
\citation{MRAM:HYY+05,MRAM:KTM+07}
\citation{xie-jtc-2006,3D-micro}
\citation{xydong-dac}
\citation{gsun-hpca}
\citation{xydong-dac,gsun-hpca}
\citation{MRAM:HYY+05,MRAM:ZBM+06}
\@writefile{toc}{\contentsline {section}{\numberline {1}\hskip -1em.\nobreakspace  {}Introduction}{1}}
\newlabel{sec:intro}{{1}{1}}
\citation{MRAM:ICCAD09:Zhou}
\citation{pcm-date-2010}
\citation{gsun-hpca}
\citation{Peh-dally,evc,topology-hpca,dragonfly,rca-hpca}
\citation{xu-radix,gsun-hpca,mira}
\citation{gsun-hpca}
\citation{MRAM:HYY+05,MRAM:KTM+07}
\citation{MRAM:HYY+05,MRAM:KTM+07}
\citation{Dally-DAC}
\citation{Peh-dally}
\@writefile{toc}{\contentsline {section}{\numberline {2}\hskip -1em.\nobreakspace  {}Background}{4}}
\newlabel{sec:background}{{2}{4}}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces \relax \fontsize  {8}{9.5}\selectfont  \bf  MTJ structure and STT-RAM cell (a) Anti-parallel (high resistance), indicating ``1'' state (b) Parallel (low resistance), indicating ``0'' state (c) STT-RAM Structural view (d) STT-RAM Schematic.}}{4}}
\newlabel{fig:mram}{{1}{4}}
\citation{3d-hpca-2010}
\citation{picoserver,3D-micro,madan-hpca}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces  \relax \fontsize  {8}{9.5}\selectfont  \bf  (a) Example request sequence at R0 in a 2x2 mesh topology; arbitration sequence using (b) simple round-robin arbiter and (c) a STT-RAM bank aware arbiter.}}{5}}
\newlabel{fig:motiv}{{2}{5}}
\@writefile{toc}{\contentsline {section}{\numberline {3}\hskip -1em.\nobreakspace  {}STT-RAM Aware NoC Design}{5}}
\newlabel{sec:arch_details}{{3}{5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}\hskip -1em.\nobreakspace  {}A Case for STT-RAM aware router arbitration}{5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}\hskip -1em.\nobreakspace  {}Our proposal: Re-ordering accesses to STT-RAM banks}{6}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Cache access distribution}{7}}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces \relax \fontsize  {8}{9.5}\selectfont  \bf  Plots showing the distribution of consecutive accesses to STT-RAM banks in different applications following a write access(the last column shows the average across the whole benchmark suite). The horizontal axis represents the access latencies in cycles and the vertical axis represents the percentage of access. The inset in each plot mentions the average number of request-packets in a router in the cache layer to 2-hop away STT-RAM destination.}}{8}}
\newlabel{fig:mram_access}{{3}{8}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.4}\hskip -1em.\nobreakspace  {}Facilitating prioritization}{8}}
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces \relax \fontsize  {8}{9.5}\selectfont  \bf  Two layers of the 3D CMP: (a) Core Layer (b) Cache Layer (c) Cache layer showing the child nodes of parent nodes.}}{9}}
\newlabel{flat-3D}{{4}{9}}
\citation{topology-hpca}
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces \relax \fontsize  {8}{9.5}\selectfont  \bf  Proposed 3D architecture with cores in the top layer and STT-RAM banks (partitioned into 4 logical regions) in the bottom layer. The bold arrows show the route taken by requests from core to cache bank.}}{10}}
\newlabel{fig:3D}{{5}{10}}
\citation{rca-hpca}
\citation{rca-hpca}
\citation{rca-hpca}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.5}Estimation of busy time}{11}}
\newlabel{sec:busy_time}{{3.5}{11}}
\citation{sparc}
\citation{cacti-6}
\@writefile{toc}{\contentsline {section}{\numberline {4}\hskip -1em.\nobreakspace  {}Experimental Evaluation}{12}}
\newlabel{sec:exp_eval}{{4}{12}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}\hskip -1em.\nobreakspace  {}Experimental Platform}{12}}
\newlabel{sec:exp_platform}{{4.1}{12}}
\citation{Peh-dally}
\citation{Wang-orion}
\citation{xydong-dac,gsun-hpca}
\citation{MRAM:HYY+05}
\citation{MRAM:ZBM+06}
\citation{MRAM:HYY+05}
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces \relax \fontsize  {8}{9.5}\selectfont  {\bf  Baseline processor, cache, memory and network configuration}}}{13}}
\newlabel{table:sim_config}{{1}{13}}
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces \relax \fontsize  {8}{9.5}\selectfont  {\bf  SRAM and STT-RAM comparison at 32nm}}}{13}}
\newlabel{table:comparison}{{2}{13}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}\hskip -1em.\nobreakspace  {}Experimental results}{13}}
\newlabel{sec:exp_results}{{4.2}{13}}
\citation{allan-asplos}
\citation{speedup}
\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces \relax \fontsize  {8}{9.5}\selectfont  {\bf  TSB parameters at 32nm}}}{14}}
\newlabel{table:tsb}{{3}{14}}
\@writefile{lot}{\contentsline {table}{\numberline {4}{\ignorespaces \relax \fontsize  {8}{9.5}\selectfont  {\bf  Benchmark Table.} l1mpki: L1 misses per 1000 instructions, l2mpki: L2 misses per 1000 instructions, l2wpki: L2 writes per 1000 instructions l2rpki: L2 reads per 1000 instructions, Bursty: (High/Low) based on latency between 2 consecutive requests to a L2 bank.}}{14}}
\newlabel{table:benchmark}{{4}{14}}
\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces \relax \fontsize  {8}{9.5}\selectfont  \bf  System throughput of the benchmarks normalized to the SRAM-64TSB case (from top to bottom: IPC for server and PARSEC benchmarks and instruction throughput with multi-programmed SPEC-2006 ) . }}{15}}
\newlabel{fig:ipc}{{6}{15}}
\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces \relax \fontsize  {8}{9.5}\selectfont  \bf  Packet latency breakdown into network latency (net lat) and queuing latency at memory banks (queue lat). SRAM-64TSB are exact percentages. All other values are normalized to that of SRAM-64TSB.}}{15}}
\newlabel{fig:lat_breakdown}{{7}{15}}
\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces \relax \fontsize  {8}{9.5}\selectfont  \bf  Energy of the benchmarks normalized to that of SRAM-64TSB. }}{16}}
\newlabel{fig:energy}{{8}{16}}
\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces \relax \fontsize  {10}{12}\selectfont  \abovedisplayskip 10\p@ plus2\p@ minus5\p@ \abovedisplayshortskip \z@ plus3\p@ \belowdisplayshortskip 6\p@ plus3\p@ minus3\p@ \def \leftmargin \leftmargini \parsep 4.5\p@ plus2\p@ minus\p@ \topsep 9\p@ plus3\p@ minus5\p@ \itemsep 4.5\p@ plus2\p@ minus\p@ {\leftmargin \leftmargini \topsep 6\p@ plus2\p@ minus2\p@ \parsep 3\p@ plus2\p@ minus\p@ \itemsep \parsep }\belowdisplayskip \abovedisplayskip \bf  Weighted speedup (WS) and instruction throughput (IT) for the multiprogrammed workloads.}}{18}}
\newlabel{fig:case_multiprogrammed}{{9}{18}}
\@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces \relax \fontsize  {10}{12}\selectfont  \abovedisplayskip 10\p@ plus2\p@ minus5\p@ \abovedisplayshortskip \z@ plus3\p@ \belowdisplayshortskip 6\p@ plus3\p@ minus3\p@ \def \leftmargin \leftmargini \parsep 4.5\p@ plus2\p@ minus\p@ \topsep 9\p@ plus3\p@ minus5\p@ \itemsep 4.5\p@ plus2\p@ minus\p@ {\leftmargin \leftmargini \topsep 6\p@ plus2\p@ minus2\p@ \parsep 3\p@ plus2\p@ minus\p@ \itemsep \parsep }\belowdisplayskip \abovedisplayskip \bf  Maximum slowdown of the apps. in Case-2.}}{18}}
\newlabel{fig:max_slow_multiprogrammed}{{10}{18}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}\hskip -1em.\nobreakspace  {}Sensitivity analysis}{18}}
\newlabel{sec:sensitivity_anyl}{{4.3}{18}}
\@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces \relax \fontsize  {8}{9.5}\selectfont  \bf  Illustration showing the various placement of TSBs and the sub-division of cache layer into regions. The yellow cells denote TSBs and the rest of the colored cells denote STT-RAM cache banks.}}{19}}
\newlabel{fig:tsb-placement}{{11}{19}}
\@writefile{lof}{\contentsline {figure}{\numberline {12}{\ignorespaces \relax \fontsize  {8}{9.5}\selectfont  \bf  Sensitivity to placement of TSBs and number of cache regions.}}{19}}
\newlabel{fig:senst-tsb}{{12}{19}}
\@writefile{lof}{\contentsline {figure}{\numberline {13}{\ignorespaces \relax \fontsize  {8}{9.5}\selectfont  \bf  Average number of request-packets in a router in the cache layer to 1-hop, 2-hop and 3-hop away STT-RAM destination and sensitivity to hop distance.}}{19}}
\newlabel{fig:senst-hop}{{13}{19}}
\@writefile{lof}{\contentsline {figure}{\numberline {14}{\ignorespaces \relax \fontsize  {8}{9.5}\selectfont  \bf  Latency reduction comparison with write-buffering (normalized to STT-RAM with no write-buffering).}}{19}}
\newlabel{fig:write-buff}{{14}{19}}
\citation{gsun-hpca}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.4}\hskip -1em.\nobreakspace  {}Comparison with a SRAM write buffer}{20}}
\newlabel{sec:write_buff}{{4.4}{20}}
\citation{MRAM:DLK+02,MRAM:DKB02}
\citation{xydong-dac}
\citation{gsun-hpca}
\citation{MRAM:ICCAD09:Zhou}
\citation{gsun-hpca}
\@writefile{toc}{\contentsline {section}{\numberline {5}\hskip -1em.\nobreakspace  {}Related Work}{21}}
\newlabel{sec:prior}{{5}{21}}
\@writefile{toc}{\contentsline {section}{\numberline {6}\hskip -1em.\nobreakspace  {}Conclusions}{21}}
\newlabel{conclusion}{{6}{21}}
\bibstyle{latex8}
\bibdata{MICRO,MRAM}
\bibcite{Agarwal00}{1}
\bibcite{3D-micro}{2}
\bibcite{BurgerGK96}{3}
\bibcite{Dally-DAC}{4}
\bibcite{topology-hpca}{5}
\bibcite{MRAM:DKB02}{6}
\bibcite{MRAM:DLK+02}{7}
\bibcite{xydong-dac}{8}
\bibcite{speedup}{9}
\bibcite{rca-hpca}{10}
\bibcite{MRAM:HYY+05}{11}
\bibcite{pcm-date-2010}{12}
\bibcite{MRAM:KTM+07}{13}
\bibcite{picoserver}{14}
\bibcite{dragonfly}{15}
\bibcite{evc}{16}
\bibcite{madan-hpca}{17}
\bibcite{cacti-6}{18}
\bibcite{mira}{19}
\bibcite{Peh-dally}{20}
\bibcite{allan-asplos}{21}
\bibcite{gsun-hpca}{22}
\bibcite{sparc}{23}
\bibcite{Wang-orion}{24}
\bibcite{3d-hpca-2010}{25}
\bibcite{xie-jtc-2006}{26}
\bibcite{xu-radix}{27}
\bibcite{MRAM:ZBM+06}{28}
\bibcite{MRAM:ICCAD09:Zhou}{29}
